import pandas as pd
df = pd.read_csv("C:/Users/ASUS/OneDrive/Desktop/DatasetR/houseprice.csv")
df
| area | price | |
|---|---|---|
| 0 | 2600 | 550000 |
| 1 | 3000 | 565000 |
| 2 | 3200 | 610000 |
| 3 | 3600 | 680000 |
| 4 | 4000 | 725000 |
import plotly.express as px
import plotly.graph_objects as go
fig = px.scatter(df, x="area", y="price",
labels={"area": "Area (sq ft)",
"price": "Price (USD)"},
title="Housing Price")
fig.show()
from sklearn import linear_model
model_obj = linear_model.LinearRegression()
model_obj.fit(df[['area']],df.price)
LinearRegression()
model_obj.predict([[3300]])
array([628715.75342466])
# Linear Regression y = mx + b
# Value of m that is slope or gradient
model_obj.coef_
array([135.78767123])
model_obj.intercept_
180616.43835616432
# y = mx + b
135.78767123*3300 + 180616.43835616432
628715.7534151643
#Using plotly.express's trendline parameter to train a simply Ordinary Least Square (OLS) for predicting the Price
fig = px.scatter(df, x="area", y="price",
labels={"area": "Area (sq ft)",
"price": "Price (USD)"},
title="Housing Price", opacity=0.65,
trendline='ols', trendline_color_override='darkblue')
fig.show()
#Gradient Descent is iterative while OLS isn't.
#Gradient Descent uses a learning rate to reach the point of minima,
#while OLS just finds the minima of the equation using partial differentiation.